#######################
#Create elections data#
#######################

oldwd = getwd()
#Set directory
setwd('./data')

old = ls()

#2004 Election Data
results_2004 = fread('./elections/dprd2_2004/dprd2_2004_results.csv')
results_2004[, V1 := NULL]

#clean province names
results_2004[provinsi %in% c("JAWA TENCAH", "JAWA TENGAH", "JAWA TENGAR"), provinsi := "JAWA TENGAH"]
results_2004[provinsi %in% c("KALIMANTAN BARAT", "KALIMANTAN BARAR"), provinsi := "KALIMANTAN BARAT"]
results_2004[provinsi %in% c("NANGGORE ACEH DARUSSALAM", "NANGGROE ACEH DARUSSALAM"), provinsi := "NANGGROE ACEH DARUSSALAM"]
results_2004[provinsi %in% c("NURA TENGGARA TIMUR", "NUSA TENGGARA TIMUR"), provinsi := "NUSA TENGGARA TIMUR"]
results_2004[provinsi %in% c("SUAWESI SELATAN", "SULAWESI SELATAN", "SULAWESI SELETAN", "SUMATERA SELATAN\r"), provinsi := "SULAWESI SELATAN"]

#Standardized variable names
use_vars = setdiff(names(results_2004), paste0('party', 1:24))
results_2004 = melt.data.table(results_2004, id.vars = use_vars, 
                               measure.vars = patterns('^party'),
                               value.name = 'total_votes',
                               variable.name = 'no_partai_politik')
results_2004[, no_partai_politik := str_extract(no_partai_politik, '\\d+')]
results_2004[is.na(total_votes), total_votes := 0]

results_2004_clean = results_2004[, list(kab_code, no_partai_politik, total_votes = total_votes, 
                    province = provinsi, kabupaten, 
                    dapil = paste(kabupaten, dp_no),
                    meta_total_votes = total_original,
                    meta_total_seats = num_seats,
                    jumlah_total_votes = total_check
                    )]


#2009 Election Data
results_2009 = fread('./elections/dprd2_2009/dprd2_2009_results.csv')

num_cols = names(results_2009)[c(4:9, 13:21)]
results_2009[, (num_cols) := lapply(.SD, function(x) as.numeric(str_replace(x, ",", ""))), .SDcols = num_cols]

cols = c('total_votes', 'quota_seats', 'remainder_seats', 'remainder_rank', 'total_seats')

results_2009[, paste0('verify_', cols) := lapply(.SD, sum), by = list(province, kabupaten, dapil), .SDcols = cols]

#flags
results_2009[, flag_total_votes_1 := verify_total_votes != meta_total_votes]
results_2009[, flag_total_votes_2 := verify_total_votes != jumlah_total_votes]
results_2009[, flag_quota_seats := verify_quota_seats != jumlah_quota_seats]
results_2009[, flag_remainder_seats := verify_remainder_seats  != jumlah_remainder_seats ]
results_2009[, flag_remainder_rank := verify_remainder_rank  != jumlah_rank_sum ]
results_2009[, flag_total_seats_1 := verify_total_seats != jumlah_total_seats ]
results_2009[, flag_total_seats_2 := verify_total_seats != meta_total_seats ]

#No errors
flag_vars = names(results_2009) %>% .[str_detect(., '^flag_')]
results_2009[, flag_count := rowSums(.SD), .SDcols = flag_vars]
results_2009_clean = results_2009[flag_count %in% c(0,NA) & !is.na(meta_total_seats) & verify_total_votes > 0]

############################
#Check 2004 seat allocation#
############################

if (FALSE) {
  check_2004 = results_2004_clean[, {
    p_no = no_partai_politik;
    dapil_seats = meta_total_seats %>% unique;
    all_total_votes = sum(total_votes, na.rm = T);
    quota = round(all_total_votes/dapil_seats);
    first_round_seats = floor(total_votes / quota);
    remaining_seats = dapil_seats - sum(first_round_seats, na.rm = T);
    remainder = total_votes - (first_round_seats*quota);
    remainder_rank = frankv(remainder, order = -1L);
    remainder_quota = (remainder_rank <= remaining_seats);
    won_seats = first_round_seats + remainder_quota;
    list(
      p_no = p_no,
      won_seats = won_seats,
      dapil_seats = dapil_seats,
      check_dapil_seats = sum(won_seats),
      dapil_seats_flag = dapil_seats != sum(won_seats)
    )
  }, by =  list(province, kabupaten, kab_code, dapil)]
  
  #Reshape
  dcast.data.table(check_2004, province + kabupaten + kab_code + dapil + dapil_seats_flag + check_dapil_seats + dapil_seats ~ p_no, value.var = 'won_seats')
  #Save
  write.csv(check_2004, './elections/dprd2_2004/verify_2004_seat_allocation.csv', row.names = F)

}

#######################
#Create treatment data# 
#######################

######
#2009#
######

p_IT_2009 = c(8,29,27,24)
p_IC_2009 = c(9,13,34,
              18, 42)
p_ICIT_2009 = c(p_IT_2009,p_IC_2009)
p_NI_2009 = c(23,28,31,1,5,39,
              2, 7, 30, 4, 25, 6, 16, 26, 15, 11, 14, 19, 10, 12, 21, 17, 22, 43, 3, 41, 32, 44, 33)

##########
#Collapse#
##########

dapil_elections_2009 = results_2009_clean[, {
  p_no = no_partai_politik;
  dapil_seats = meta_total_seats %>% unique;
  all_total_votes = sum(total_votes, na.rm = T);
  p_vs = total_votes / all_total_votes;
  enp = 1 / sum(p_vs^2);
  ni_vs = p_vs[which(p_no %in% p_NI_2009)];
  enp_NI =  1 / sum((ni_vs/sum(ni_vs, na.rm = T))^2, na.rm = T);
  icit_vs = p_vs[which(p_no %in% p_ICIT_2009)];
  enp_ICIT = 1 / sum((icit_vs/sum(icit_vs, na.rm = T))^2, na.rm = T);
  ic_vs = p_vs[which(p_no %in% p_IC_2009)];
  enp_IC = 1 / sum( (ic_vs/sum(ic_vs, na.rm = T)) ^2, na.rm = T);
  it_vs = p_vs[which(p_no %in% p_IT_2009)];
  enp_IT = 1 / sum( (it_vs/sum(it_vs, na.rm = T)) ^2, na.rm = T);
  quota = round(all_total_votes/dapil_seats);
  first_round_seats = floor(total_votes / quota);
  remaining_seats = dapil_seats - sum(first_round_seats, na.rm = T);
  remainder = total_votes - (first_round_seats*quota);
  remainder_rank = frankv(remainder, order = -1L);
  remainder_quota = (remainder_rank <= remaining_seats);
  won_seats = first_round_seats + remainder_quota;
  last_seat = (remainder_rank %in% (remaining_seats + 0:1));
  last_seat_win = remainder_rank %in% (remaining_seats);
  last_seat_lose = remainder_rank %in% (remaining_seats + 1);
  last_seat_mov_pct = (remainder[which(last_seat_win)] - 
                         remainder[which(last_seat_lose)]) / all_total_votes;
  party_last_seat_winner = p_no[which(last_seat_win)];
  party_last_seat_loser = p_no[which(last_seat_lose)];
  winner_remainder = remainder[which(last_seat_win)];
  loser_remainder = remainder[which(last_seat_lose)]
  winner_first_round_seats = first_round_seats[which(last_seat_win)];
  loser_first_round_seats = first_round_seats[which(last_seat_lose)];
  winner_total_votes = total_votes[which(last_seat_win)];
  loser_total_votes = total_votes[which(last_seat_lose)];
  list(
    bw = last_seat_mov_pct,
    enp = enp,
    enp_NI = enp_NI,
    enp_ICIT = enp_ICIT,
    enp_IC = enp_IC,
    enp_IT = enp_IT, 
    remaining_seats = remaining_seats,
    remaining_seats_pct = remaining_seats/dapil_seats,
    party_last_seat_winner = party_last_seat_winner,
    party_last_seat_loser = party_last_seat_loser,
    winner_first_round_seats = winner_first_round_seats,
    loser_first_round_seats = loser_first_round_seats,
    winner_total_votes = winner_total_votes, 
    loser_total_votes = loser_total_votes,
    winner_remainder =winner_remainder,
    loser_remainder = loser_remainder,
    NI_close = any(p_no[which(last_seat)] %in% p_NI_2009),
    ICIT_close = any(p_no[which(last_seat)] %in% p_ICIT_2009),
    IC_close = any(p_no[which(last_seat)] %in% p_IC_2009),
    IT_close = any(p_no[which(last_seat)] %in% p_IT_2009),
    all_total_votes = all_total_votes,
    meta_total_votes = unique(meta_total_votes) %>% sum,
    dapil_seats = dapil_seats,
    all_NI_vs = sum(total_votes[which(p_no %in% p_NI_2009)]),
    all_ICIT_vs = sum(total_votes[which(p_no %in% p_ICIT_2009)]),
    all_IC_vs = sum(total_votes[which(p_no %in% p_IC_2009)]),
    all_IT_vs = sum(total_votes[which(p_no %in% p_IT_2009)]),
    all_NI_fr_seats = sum(first_round_seats[which(p_no %in% p_NI_2009)]),
    all_ICIT_fr_seats = sum(first_round_seats[which(p_no %in% p_ICIT_2009)]),
    all_IC_fr_seats = sum(first_round_seats[which(p_no %in% p_IC_2009)]),
    all_IT_fr_seats = sum(first_round_seats[which(p_no %in% p_IT_2009)]),
    all_NI_seats = sum(won_seats[which(p_no %in% p_NI_2009)]),
    all_ICIT_seats = sum(won_seats[which(p_no %in% p_ICIT_2009)]),
    all_IC_seats = sum(won_seats[which(p_no %in% p_IC_2009)]),
    all_IT_seats = sum(won_seats[which(p_no %in% p_IT_2009)]),
    all_NI_remainder = sum(remainder[which(p_no %in% p_NI_2009)]),
    all_ICIT_remainder = sum(remainder[which(p_no %in% p_ICIT_2009)]),
    all_IC_remainder = sum(remainder[which(p_no %in% p_IC_2009)]),
    all_IT_remainder = sum(remainder[which(p_no %in% p_IT_2009)]),
    IT_count = sum(total_votes[which(p_no %in% p_IT_2009)] > 0), 
    NI_count = sum(total_votes[which(p_no %in% p_NI_2009)] > 0),
    IC_count = sum(total_votes[which(p_no %in% p_IC_2009)] > 0),
    ICIT_count = sum(total_votes[which(p_no %in% p_ICIT_2009)] > 0),
    quota = quota)
}, by =  list(province, kabupaten, dapil, kab_code)]

#Close election type
dapil_elections_2009[, NI_ICIT := NI_close*ICIT_close]
dapil_elections_2009[, NI_IC := NI_close*IC_close]
dapil_elections_2009[, NI_IT := NI_close*IT_close]
dapil_elections_2009[, IC_IT := IC_close*IT_close]
dapil_elections_2009[, NIIC_IT := (NI_close|IC_close) & IT_close]

#Winner
dapil_elections_2009[, NI_win := party_last_seat_winner %in% p_NI_2009]
dapil_elections_2009[, NI_both := party_last_seat_winner %in% p_NI_2009 & party_last_seat_loser %in% p_NI_2009]
dapil_elections_2009[, NIIC_both := party_last_seat_winner %in% c(p_NI_2009,p_IC_2009) & party_last_seat_loser %in% c(p_NI_2009,p_IC_2009)]
dapil_elections_2009[, ICIT_win := party_last_seat_winner %in% p_ICIT_2009]
dapil_elections_2009[, IC_win := party_last_seat_winner %in% p_IC_2009]
dapil_elections_2009[, IT_win := party_last_seat_winner %in% p_IT_2009]
dapil_elections_2009[, NIIC_win := (NI_win | IC_win)]




######
#2004#
######

p_IT_2004 = c(16,17,3,5)
p_IC_2004 = c(13, 15,
              12)
p_ICIT_2004 = c(p_IT_2004,p_IC_2004)
p_NI_2004 = c(20,18,9,14,
              19, 6, 10, 21, 24, 4, 22, 8, 1, 11, 23, 2)


dapil_elections_2004 = results_2004_clean[, {
  p_no = no_partai_politik;
  dapil_seats = meta_total_seats %>% unique;
  all_total_votes = sum(total_votes, na.rm = T);
  p_vs = total_votes / all_total_votes;
  enp = 1 / sum(p_vs^2);
  ni_vs = p_vs[which(p_no %in% p_NI_2004)];
  enp_NI =  1 / sum((ni_vs/sum(ni_vs, na.rm = T))^2, na.rm = T);
  icit_vs = p_vs[which(p_no %in% p_ICIT_2004)];
  enp_ICIT = 1 / sum((icit_vs/sum(icit_vs, na.rm = T))^2, na.rm = T);
  ic_vs = p_vs[which(p_no %in% p_IC_2004)];
  enp_IC = 1 / sum( (ic_vs/sum(ic_vs, na.rm = T)) ^2, na.rm = T);
  it_vs = p_vs[which(p_no %in% p_IT_2004)];
  enp_IT = 1 / sum( (it_vs/sum(it_vs, na.rm = T)) ^2, na.rm = T);
  quota = round(all_total_votes/dapil_seats);
  first_round_seats = floor(total_votes / quota);
  remaining_seats = dapil_seats - sum(first_round_seats, na.rm = T);
  remainder = total_votes - (first_round_seats*quota);
  remainder_rank = frankv(remainder, order = -1L);
  remainder_quota = (remainder_rank <= remaining_seats);
  won_seats = first_round_seats + remainder_quota;
  last_seat = (remainder_rank %in% (remaining_seats + 0:1));
  last_seat_win = remainder_rank %in% (remaining_seats);
  last_seat_lose = remainder_rank %in% (remaining_seats + 1);
  last_seat_mov_pct = (remainder[which(last_seat_win)] - 
                         remainder[which(last_seat_lose)]) / all_total_votes;
  party_last_seat_winner = p_no[which(last_seat_win)];
  party_last_seat_loser = p_no[which(last_seat_lose)];
  winner_remainder = remainder[which(last_seat_win)];
  loser_remainder = remainder[which(last_seat_lose)]
  winner_first_round_seats = first_round_seats[which(last_seat_win)];
  loser_first_round_seats = first_round_seats[which(last_seat_lose)];
  winner_total_votes = total_votes[which(last_seat_win)];
  loser_total_votes = total_votes[which(last_seat_lose)];
  list(
    bw = last_seat_mov_pct,
    enp = enp,
    enp_NI = enp_NI,
    enp_ICIT = enp_ICIT,
    enp_IC = enp_IC,
    enp_IT = enp_IT, 
    remaining_seats = remaining_seats,
    remaining_seats_pct = remaining_seats/dapil_seats,
    party_last_seat_winner = party_last_seat_winner,
    party_last_seat_loser = party_last_seat_loser,
    winner_first_round_seats = winner_first_round_seats,
    loser_first_round_seats = loser_first_round_seats,
    winner_total_votes = winner_total_votes, 
    loser_total_votes = loser_total_votes,
    winner_remainder =winner_remainder,
    loser_remainder = loser_remainder,
    NI_close = any(p_no[which(last_seat)] %in% p_NI_2004),
    ICIT_close = any(p_no[which(last_seat)] %in% p_ICIT_2004),
    IC_close = any(p_no[which(last_seat)] %in% p_IC_2004),
    IT_close = any(p_no[which(last_seat)] %in% p_IT_2004),
    all_total_votes = all_total_votes,
    meta_total_votes = unique(meta_total_votes) %>% sum,
    dapil_seats = dapil_seats,
    all_NI_vs = sum(total_votes[which(p_no %in% p_NI_2004)]),
    all_ICIT_vs = sum(total_votes[which(p_no %in% p_ICIT_2004)]),
    all_IC_vs = sum(total_votes[which(p_no %in% p_IC_2004)]),
    all_IT_vs = sum(total_votes[which(p_no %in% p_IT_2004)]),
    all_NI_fr_seats = sum(first_round_seats[which(p_no %in% p_NI_2004)]),
    all_ICIT_fr_seats = sum(first_round_seats[which(p_no %in% p_ICIT_2004)]),
    all_IC_fr_seats = sum(first_round_seats[which(p_no %in% p_IC_2004)]),
    all_IT_fr_seats = sum(first_round_seats[which(p_no %in% p_IT_2004)]),
    all_NI_seats = sum(won_seats[which(p_no %in% p_NI_2004)]),
    all_ICIT_seats = sum(won_seats[which(p_no %in% p_ICIT_2004)]),
    all_IC_seats = sum(won_seats[which(p_no %in% p_IC_2004)]),
    all_IT_seats = sum(won_seats[which(p_no %in% p_IT_2004)]),
    all_NI_remainder = sum(remainder[which(p_no %in% p_NI_2004)]),
    all_ICIT_remainder = sum(remainder[which(p_no %in% p_ICIT_2004)]),
    all_IC_remainder = sum(remainder[which(p_no %in% p_IC_2004)]),
    all_IT_remainder = sum(remainder[which(p_no %in% p_IT_2004)]),
    IT_count = sum(total_votes[which(p_no %in% p_IT_2004)] > 0), 
    NI_count = sum(total_votes[which(p_no %in% p_NI_2004)] > 0),
    IC_count = sum(total_votes[which(p_no %in% p_IC_2004)] > 0),
    ICIT_count = sum(total_votes[which(p_no %in% p_ICIT_2004)] > 0),
    quota = quota
  )
}, by =  list(province, kabupaten, kab_code, dapil)]

#Close election type
dapil_elections_2004[, NI_ICIT := NI_close & ICIT_close]
dapil_elections_2004[, NI_IC := NI_close & IC_close]
dapil_elections_2004[, NI_IT := NI_close & IT_close]
dapil_elections_2004[, IC_IT := IC_close & IT_close]
dapil_elections_2004[, NIIC_IT := (NI_close|IC_close) & IT_close]

#Winner
dapil_elections_2004[, NI_win := party_last_seat_winner %in% p_NI_2004]
dapil_elections_2004[, NI_both := party_last_seat_winner %in% p_NI_2004 & party_last_seat_loser %in% p_NI_2004]
dapil_elections_2004[, NIIC_both := party_last_seat_winner %in% c(p_NI_2004,p_IC_2004) & party_last_seat_loser %in% c(p_NI_2004,p_IC_2004)]
dapil_elections_2004[, ICIT_win := party_last_seat_winner %in% p_ICIT_2004]
dapil_elections_2004[, IC_win := party_last_seat_winner %in% p_IC_2004]
dapil_elections_2004[, IT_win := party_last_seat_winner %in% p_IT_2004]
dapil_elections_2004[, NIIC_win := (NI_win | IC_win)]


discrepancies_2004 = fread('./elections/dprd2_2004/dprd2_2004_discrepancies.csv')
discrepancies_2004 = discrepancies_2004[, list(province = provinsi, kabupaten, kab_code, 
                          dapil = paste(kabupaten, dp_no, sep = " "),
                          ERROR)]

setkey(dapil_elections_2004, province, kabupaten, kab_code, dapil)
setkey(discrepancies_2004, province, kabupaten, kab_code, dapil)

dapil_elections_2004 = discrepancies_2004[dapil_elections_2004]


#Combine elections data:

dapil_elections_2009[, ERROR := 0]
dapil_elections_2004[, election_cycle := 2004]
dapil_elections_2009[, election_cycle := 2009]

dapil_elections_all = rbindlist(list(dapil_elections_2004, dapil_elections_2009), use.names = T)

drop = setdiff(ls(), c(old, 'dapil_elections_all'))
rm(list = drop)

setwd(oldwd)